# -*- coding: utf-8 -*-
"""SPAP_2021_LSQ_TableS14_FigureS13

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/163pJ9lK3KUkgZ7SJIbUNd-VxX5Xrz1IX

**Set up**
"""

# mount drive 
from google.colab import drive
drive.mount('/content/drive')

# import packages
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

"""**Generate Data for Table S14**"""

# read data
data_path = '/content/drive/My Drive/spap_state/spap_state_attention/data/'
df_reg_pandemic_final = pd.read_csv(data_path + str('spap_state_attention_supplementary_mention.csv'))

# subset by mentions
df_reg_pandemic_final_trump = df_reg_pandemic_final[df_reg_pandemic_final['mention_trump_all']==True]
df_reg_pandemic_final_governors = df_reg_pandemic_final[df_reg_pandemic_final['mention_governor_all']==True]

# table s14 statistics 
print(sum(df_reg_pandemic_final['mention_trump_all']) / len(df_reg_pandemic_final)) # 0.9% 
print(sum(df_reg_pandemic_final['mention_governor_all']) / len(df_reg_pandemic_final)) # 7.4%
print(sum(df_reg_pandemic_final['mention_trump_all'])/sum(df_reg_pandemic_final['mention_governor_all'])) # 12.1%

# mention statistics by week
weekly_trump = pd.DataFrame(df_reg_pandemic_final['mention_trump_all'].groupby(df_reg_pandemic_final['week']).sum().reset_index())
weekly_governor = pd.DataFrame(df_reg_pandemic_final['mention_governor_all'].groupby(df_reg_pandemic_final['week']).sum().reset_index())
weekly_all = pd.merge(weekly_trump, weekly_governor, how = 'left', on = 'week')
weekly_all['ratio_all'] = weekly_all['mention_trump_all'] / weekly_all['mention_governor_all']

# do the analysis by party: republican
df_reg_pandemic_final_rep = df_reg_pandemic_final[df_reg_pandemic_final['party'] == 'R']
df_reg_pandemic_final_rep_trump = df_reg_pandemic_final_rep[df_reg_pandemic_final_rep['mention_trump_all']==True]
df_reg_pandemic_final_rep_governors = df_reg_pandemic_final_rep[df_reg_pandemic_final_rep['mention_governor_all']==True]

print(sum(df_reg_pandemic_final_rep['mention_trump_all']) / len(df_reg_pandemic_final_rep)) # 1.8%
print(sum(df_reg_pandemic_final_rep['mention_governor_all']) / len(df_reg_pandemic_final_rep)) # 9.3%
print(sum(df_reg_pandemic_final_rep['mention_trump_all'])/sum(df_reg_pandemic_final_rep['mention_governor_all'])) # 19.9%

weekly_rep_trump = pd.DataFrame(df_reg_pandemic_final_rep['mention_trump_all'].groupby(df_reg_pandemic_final_rep['week']).sum().reset_index())
weekly_rep_governor = pd.DataFrame(df_reg_pandemic_final_rep['mention_governor_all'].groupby(df_reg_pandemic_final_rep['week']).sum().reset_index())
weekly_rep_all = pd.merge(weekly_rep_trump, weekly_rep_governor, how = 'left', on = 'week')
weekly_rep_all['rep_ratio'] = weekly_rep_all['mention_trump_all'] / weekly_rep_all['mention_governor_all']

# do the analysis by party: non-republican
df_reg_pandemic_final_nonrep = df_reg_pandemic_final[df_reg_pandemic_final['party'] != 'R']
df_reg_pandemic_final_nonrep_trump = df_reg_pandemic_final_nonrep[df_reg_pandemic_final_nonrep['mention_trump_all']==True]
df_reg_pandemic_final_nonrep_governors = df_reg_pandemic_final_nonrep[df_reg_pandemic_final_nonrep['mention_governor_all']==True]

print(sum(df_reg_pandemic_final_nonrep['mention_trump_all']) / len(df_reg_pandemic_final_nonrep)) # 1.8%
print(sum(df_reg_pandemic_final_nonrep['mention_governor_all']) / len(df_reg_pandemic_final_nonrep)) # 9.3%
print(sum(df_reg_pandemic_final_nonrep['mention_trump_all'])/sum(df_reg_pandemic_final_nonrep['mention_governor_all'])) # 19.9%

weekly_nonrep_trump = pd.DataFrame(df_reg_pandemic_final_nonrep['mention_trump_all'].groupby(df_reg_pandemic_final_nonrep['week']).sum().reset_index())
weekly_nonrep_governor = pd.DataFrame(df_reg_pandemic_final_nonrep['mention_governor_all'].groupby(df_reg_pandemic_final_nonrep['week']).sum().reset_index())
weekly_nonrep_all = pd.merge(weekly_nonrep_trump, weekly_nonrep_governor, how = 'left', on = 'week')
weekly_nonrep_all['nonrep_ratio'] = weekly_nonrep_all['mention_trump_all'] / weekly_nonrep_all['mention_governor_all']

"""**Generate Figure S13**"""

timeline_mention = pd.concat([weekly_all, 
                              weekly_rep_all[['rep_ratio']], 
                              weekly_nonrep_all[['nonrep_ratio']]], 
                             axis = 1)
timeline_mention['time'] = pd.date_range(start='3/30/2020', periods=30, freq='W', closed='left')
  
fig, ax = plt.subplots()
ax.plot(timeline_mention['time'], 
        timeline_mention['ratio_all'], 
        label = "Ratio in All Tweets", 
        color = 'dimgray')
ax.plot(timeline_mention['time'], 
        timeline_mention['rep_ratio'], 
        label = "Ratio in Republican Tweets", 
        color = 'darkred')
ax.plot(timeline_mention['time'], timeline_mention['nonrep_ratio'], 
        label = "Ratio in Non-Republican Tweets", 
        color = 'tab:blue')
ax.set_xlabel("\n Time", 
              size = 12.5)
ax.set_ylabel("Ratio (Trump Mentions / Gov. Mentions) \n",
              size = 12.5)
ax.legend(loc = 'upper center')

ax2 = ax.twinx()
ax2.bar(timeline_mention['time'], timeline_mention["mention_governor_all"] + timeline_mention["mention_trump_all"], color = 'gray', width = 4, alpha = 0.1, label = 'Number of All Tweets')
ax2.set_ylabel("\n Total Mentions (Trump Mentions + Gov. Mentions)",
               size = 12.5)
fig.set_size_inches(10, 6, forward = True)
plt.savefig(data_path + 'FigS13.png', dpi = 600)